{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.10466863 0.11766361 0.1245063  0.10769962 0.12454455]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd \n",
    "import numpy as np \n",
    "import seaborn as sns\n",
    "import matplotlib \n",
    "import matplotlib.pyplot as plt\n",
    "from scipy.stats import skew\n",
    "from scipy.stats.stats import pearsonr\n",
    "\n",
    "# 读取数据集\n",
    "train = pd.read_csv('train.csv')\n",
    "test = pd.read_csv('test.csv')\n",
    "\n",
    "# 处理 SalePrice \n",
    "prices = pd.DataFrame({'price':train['SalePrice'], 'log(price+1)':np.log1p(train['SalePrice'])})\n",
    "# prices.hist()\n",
    "train['SalePrice'] = np.log1p(train['SalePrice'])\n",
    "\n",
    "# GrLivArea \n",
    "train['GrLivArea'] = train['GrLivArea'][train['GrLivArea']<4500]\n",
    "\n",
    "# 合并训练集和测试机\n",
    "all_data = pd.concat((train.loc[:,\"MSSubClass\":\"SaleCondition\"],\n",
    "                     test.loc[:, \"MSSubClass\":\"SaleCondition\"]))\n",
    "\n",
    "# 处理 偏斜>0.5\n",
    "numeric_feats = all_data.dtypes[all_data.dtypes!='object'].index\n",
    "skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna()))\n",
    "skewed_feats = skewed_feats[skewed_feats > 0.5]\n",
    "skewed_feats = skewed_feats.index\n",
    "all_data[skewed_feats] = np.log1p(all_data[skewed_feats])\n",
    "\n",
    "# 转换成虚拟变量\n",
    "all_data = pd.get_dummies(all_data)\n",
    "all_data = all_data.fillna(all_data.mean())\n",
    "\n",
    "X_train = all_data[:train.shape[0]]\n",
    "X_test = all_data[train.shape[0]:]\n",
    "y = train[\"SalePrice\"]\n",
    "\n",
    "\n",
    "\n",
    "## Models\n",
    "from sklearn.linear_model import Ridge, RidgeCV,ElasticNet, LassoCV, LassoLarsCV\n",
    "from sklearn.model_selection import cross_val_score, train_test_split\n",
    "from sklearn.preprocessing import StandardScaler, RobustScaler\n",
    "from sklearn.pipeline import make_pipeline \n",
    "\n",
    "def rmse_cv(model):\n",
    "    rmse = np.sqrt(-cross_val_score(model, X_train, y, scoring='neg_mean_squared_error', cv=5))\n",
    "    return rmse\n",
    "\n",
    "\n",
    "\"\"\"\n",
    "    Ridge\n",
    "\"\"\"\n",
    "# model_ridge = Ridge()\n",
    "# alphas = [0.03, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 100]\n",
    "# cv_ridge = [rmse_cv(Ridge(alpha=alpha)).mean() for alpha in alphas]\n",
    "\n",
    "# cv_ridge = pd.Series(cv_ridge, index=alphas)\n",
    "# cv_ridge.plot(title = 'Validation - Just Do it')\n",
    "# plt.xlabel('alpha')\n",
    "# plt.ylabel('rmse')\n",
    "\n",
    "\n",
    "\"\"\"\n",
    "    Lasso\n",
    "\"\"\"\n",
    "# alphas = [1,0.3,0.1,0.03,0.01,0.003,0.001,0.0003,0.0001,0.00003,0.00001,0.000003]\n",
    "# model_lasso = LassoCV(alphas=alphas, max_iter=50000).fit(X_train, y)\n",
    "# rmse_cv(model_lasso).mean()\n",
    "# preds = model_lasso.predict(X_test)\n",
    "# solution = pd.DataFrame({'id':test.Id, 'SalePrice':np.expm1(preds)})\n",
    "# solution.to_csv('linear_col.csv', index=False)\n",
    "\n",
    "# for alpha in alphas:\n",
    "#     model_lasso = LassoCV(alpha, max_iter=50000).fit(X_train, y)\n",
    "#     res = rmse_cv(model_lasso)\n",
    "#     print(alpha, res, res.mean())\n",
    "# model_lasso = LassoCV(0.00001, max_iter=50000).fit(X_train, y)\n",
    "# preds = model_lasso.predict(X_test)\n",
    "# solution = pd.DataFrame({'id':test.Id, 'SalePrice':np.expm1(preds)})\n",
    "# solution.to_csv('submission_2019_12_18.csv', index=False)\n",
    "\n",
    "model_lasso = make_pipeline(RobustScaler(), LassoCV(0.00001, max_iter=50000))\n",
    "model_lasso = model_lasso.fit(X_train, y)\n",
    "print(rmse_cv(model_lasso))\n",
    "preds = model_lasso.predict(X_test)\n",
    "solution = pd.DataFrame({'id':test.Id, 'SalePrice':np.expm1(preds)})\n",
    "solution.to_csv('submission_2019_12_18_2.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x2c36bc169e8>"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4MAAAJCCAYAAAB6VBJfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3XFspOd9H/jvI4pyKF9jyo6Seik5clOBrt1tvNbCUmGgOCd3ppxcI0KOEQu9WgiM0yFIr+k1YKu9GpCbOJAKHprUQGrAjd3I51SuHW9oNXHCE2wXxRmW6lUYh1FswrJbS5r1xUpXdH0WG9PUc3/sy/VyPUNyyNmd4byfD0Bw5sd3+D5LDJfzned5f0+ptQYAAIB2uWrYAwAAAODKEwYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBa6etgDGLQf+IEfqDfddNOwhwEAADAUjz/++J/XWq/f67ixC4M33XRTzpw5M+xhAAAADEUp5av7Oc4yUQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACgha4e9gAAAACOiqWVThaX13J2fSPHpqeyMDeb+RMzwx7WgQiDAAAA+7C00smp06vZ2NxKknTWN3Lq9GqSHMlAaJkoAADAPiwur10Igts2NreyuLw2pBEdjjAIAACwD2fXN/qqjzphEAAAYB+OTU/1VR91wiAAAMA+LMzNZmpyYkdtanIiC3OzQxrR4WggAwAAsA/bTWLGpZuomUEAAIAWMjMIAACwD7aWAAAAaCFbSwAAALSQrSUAAABayNYSAAAALWRrCQAAgBYat60lhEEAAIB9mj8xc2TD36UsEwUAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIX2DIOllNlSyh9d9PFfSyn/oJTy0lLKI6WULzWfr2uOL6WU95RSniyl/HEp5XUXfa+7m+O/VEq5+6L6LaWU1eYx7ymllKbe9RwAAAAczp5hsNa6Vmt9ba31tUluSfJ8kt9Jcm+ST9Zab07yyeZ+krw5yc3Nxz1J3pucD3ZJ7ktya5LXJ7nvonD33ubY7cfd3tR7nQMAAIBD6HeZ6I8n+XKt9atJ7kjyYFN/MMl8c/uOJB+s5z2aZLqU8vIkc0keqbWeq7U+l+SRJLc3X/v+Wutna601yQcv+V7dzgEAAMAh9BsG35bkoeb2D9Vav5YkzecfbOozSZ6+6DHPNLXd6s90qe92DgAAAA5h32GwlHJNkp9K8tG9Du1Sqweo71sp5Z5SyplSyplnn322n4cCAAC0Uj8zg29O8oe11j9r7v9Zs8QzzeevN/Vnktx40eNuSHJ2j/oNXeq7nWOHWuv7aq0na60nr7/++j7+SQAAAO3UTxi8K99dIpokDyfZ7gh6d5KPX1R/e9NV9LYk32iWeC4neVMp5bqmccybkiw3X/tmKeW2povo2y/5Xt3OAQAAwCFcvZ+DSinXJvkfk/yvF5UfSPKRUso7kjyV5K1N/RNJfiLJkznfefRnk6TWeq6U8stJPtcc90u11nPN7Z9L8ptJppL8fvOx2zkAAAA4hHK+gef4OHnyZD1z5sywhwEAADAUpZTHa60n9zqu326iAAAAjAFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWunrYAwAAADgqllY6WVxey9n1jRybnsrC3GzmT8wMe1gHIgwCAADsw9JKJ6dOr2ZjcytJ0lnfyKnTq0lyJAOhZaIAAAD7sLi8diEIbtvY3Mri8tqQRnQ4wiAAAMA+nF3f6Ks+6oRBAACAfTg2PdVXfdQJgwAAAPuwMDebqcmJHbWpyYkszM0OaUSHo4EMAADAPmw3idFNFAAAoGXmT8wc2fB3KctEAQAAWsjMIAAAjLFx2iSdwRIGAQBgTI3bJukMlmWiAAAwpsZtk3QGSxgEAIAxNW6bpDNYwiAAAIypcdskncESBgEAYEyN2ybpDJYGMgAAMKbGbZN0BksYBACAMTZOm6QzWPtaJlpKmS6l/HYp5YullC+UUv5mKeWlpZRHSilfaj5f1xxbSinvKaU8WUr541LK6y76Pnc3x3+plHL3RfVbSimrzWPeU0opTb3rOQAAADic/V4z+C+S/EGt9VVJfjTJF5Lcm+STtdabk3yyuZ8kb05yc/NxT5L3JueDXZL7ktya5PVJ7rso3L23OXb7cbc39V7nAAAA4BD2DIOllO9P8reSvD9Jaq3frrWuJ7kjyYPNYQ8mmW9u35Hkg/W8R5NMl1JenmQuySO11nO11ueSPJLk9uZr319r/WyttSb54CXfq9s5AAAAOIT9zAz+lSTPJvnXpZSVUspvlFJenOSHaq1fS5Lm8w82x88kefqixz/T1HarP9Olnl3OsUMp5Z5SyplSyplnn312H/8kAACAdttPGLw6yeuSvLfWeiLJt7L7cs3SpVYPUN+3Wuv7aq0na60nr7/++n4eCgAA0Er7CYPPJHmm1vpYc/+3cz4c/lmzxDPN569fdPyNFz3+hiRn96jf0KWeXc4BAADAIewZBmut/2+Sp0sp2ztT/niSP03ycJLtjqB3J/l4c/vhJG9vuoreluQbzRLP5SRvKqVc1zSOeVOS5eZr3yyl3NZ0EX37Jd+r2zkAAAA4hP3uM/i/JfmtUso1Sb6S5GdzPkh+pJTyjiRPJXlrc+wnkvxEkieTPN8cm1rruVLKLyf5XHPcL9VazzW3fy7JbyaZSvL7zUeSPNDjHAAAABxCOd/Ac3ycPHmynjlzZtjDAAAAGIpSyuO11pN7HbfffQYBAAAYI8IgAABACwmDAAAALbTfBjIAAMARtLTSyeLyWs6ub+TY9FQW5mYzf2Jm2MNiBAiDAAAwppZWOjl1ejUbm1tJks76Rk6dXk0SgRDLRAEAYFwtLq9dCILbNja3sri8NqQRMUqEQQAAGFNn1zf6qtMuwiAAAIypY9NTfdVpF2EQAADG1MLcbKYmJ3bUpiYnsjA3O6QRMUo0kAEAgDG13SRGN1G6EQYBAGCMzZ+YEf7oyjJRAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABa6OphDwAAALh8llY6WVxey9n1jRybnsrC3GzmT8wMe1iMAGEQAADG1NJKJ6dOr2ZjcytJ0lnfyKnTq0kiEB7QOIVry0QBAGBMLS6vXQiC2zY2t7K4vDakER1t2+G6s76Rmu+G66WVzrCHdiDCIAAAjKmz6xt91dnduIVrYRAAAMbUsempvursbtzCtTAIAABjamFuNlOTEztqU5MTWZibHdKIjrZxC9fCIAAAjKn5EzO5/87jmZmeSkkyMz2V++88fmQbngzbuIVr3UQBAGCMzZ+YEf4GZPvnOC7dRIVBAACAfRqncG2ZKAAAQAsJgwAAAC0kDAIAALSQMAgAANBCGsgAADBSllY6Y9OtEUaZMAgAwMhYWunk1OnVbGxuJUk66xs5dXo1SQRCGDDLRAEAGBmLy2sXguC2jc2tLC6vDWlEML6EQQAARsbZ9Y2+6sDBCYMAAIyMY9NTfdWBgxMGAQAYGQtzs5manNhRm5qcyMLc7JBGBONLAxkAAEbGdpMY3UTh8hMGAQAYKfMnZoQ/uAIsEwUAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABooX2FwVLKfy6lrJZS/qiUcqapvbSU8kgp5UvN5+uaeimlvKeU8mQp5Y9LKa+76Pvc3Rz/pVLK3RfVb2m+/5PNY8tu5wAAAPZnaaWTNzzwqbzy3t/LGx74VJZWOsMeEiOin5nBN9ZaX1trPdncvzfJJ2utNyf5ZHM/Sd6c5Obm454k703OB7sk9yW5Ncnrk9x3Ubh7b3Ps9uNu3+McAADAHpZWOjl1ejWd9Y3UJJ31jZw6vSoQkuRwy0TvSPJgc/vBJPMX1T9Yz3s0yXQp5eVJ5pI8Ums9V2t9LskjSW5vvvb9tdbP1lprkg9e8r26nQMAANjD4vJaNja3dtQ2NreyuLw2pBExSvYbBmuS/7uU8ngp5Z6m9kO11q8lSfP5B5v6TJKnL3rsM01tt/ozXeq7nQMAANjD2fWNvuq0y9X7PO4NtdazpZQfTPJIKeWLuxxbutTqAer71gTUe5LkFa94RT8PBQCAsXVseiqdLsHv2PTUEEbDqNnXzGCt9Wzz+etJfifnr/n7s2aJZ5rPX28OfybJjRc9/IYkZ/eo39Clnl3Ocen43ldrPVlrPXn99dfv558EAABjb2FuNlOTEztqU5MTWZibHdKIGCV7hsFSyotLKX9p+3aSNyX5kyQPJ9nuCHp3ko83tx9O8vamq+htSb7RLPFcTvKmUsp1TeOYNyVZbr72zVLKbU0X0bdf8r26nQMAANjD/ImZ3H/n8cxMT6UkmZmeyv13Hs/8iZk9H8v4288y0R9K8jvNbg9XJ/k3tdY/KKV8LslHSinvSPJUkrc2x38iyU8keTLJ80l+NklqredKKb+c5HPNcb9Uaz3X3P65JL+ZZCrJ7zcfSfJAj3MAAAD7MH9iRvijq3K+gef4OHnyZD1z5sywhwEAADAUpZTHL9oSsKfDbC0BAADAESUMAgAAtNB+t5YAAACOoKWVThaX13J2fSPHpqeyMDfrGkKSCIMAADC2llY6OXV6NRubW0mSzvpGTp1eTRKBEMtEAQBgXC0ur10Igts2NreyuLw2pBExSoRBAAAYU2fXN/qq0y7CIAAAjKlj01N91WkXYRAAAMbUwtxspiYndtSmJieyMDc7pBExSjSQAQCAMbXdJEY3UboRBgEAYIzNn5gR/gZonLbqEAYBAAD2Ydy26nDNIAAAwD6M21YdwiAAAMA+jNtWHcIgAADAPozbVh3CIAAAwD6M21YdwiAAAMA+zJ+YyVtumclEKUmSiVLylluObrdWYRAAAGAfllY6+befezpbtSZJtmrNv/3c01la6Qx5ZAcjDAIAAOzDP/13T2Rzq+6obW7V/NN/98SQRnQ4wiAAAMA+PPf8Zl/1UScMAgAAtJAwCAAAsA/TU5N91UedMAgAALAP7/qp12TyqrKjNnlVybt+6jVDGtHhXD3sAQAAABwF21tILC6v5ez6Ro5NT2VhbvbIbi0hDAIAAOzT/Imju6/gpSwTBQAAaCEzgwAAAPu0tNKxTBQAAKBNllY6OXV6NRubW0mSzvpGTp1eTZIjGQgtEwUAANiHxeW1C0Fw28bmVhaX14Y0osMRBgEAAPbh7PpGX/VRZ5koAACMsXG6xm3Yjk1PpdMl+B2bnhrCaA7PzCAAAIyp7WvcOusbqfnuNW5LK51hD+1IWpibzdTkxI7a1OREFuZmhzSiwxEGAQBgTI3bNW7DNn9iJvffeTwz01MpSWamp3L/nceP7EyrZaIAADCmxu0at1EwTpvOC4MAAIwU17gNzrhd48ZgWSYKAMDIcI3bYI3bNW6jYGmlkzc88Km88t7fyxse+NSRfm4KgwAAjAzXuA3W/ImZvOWWmUyUkiSZKCVvuWV8ljleaeP2ZoUwCADAyHCN22AtrXTyscc72ao1SbJVaz72eOfIhpdhG7c3K4RBAABGRq9r2VzjdjDjFl6GbdzerBAGAQAYGa5xG6xxCy/DNm5vVgiDAACMjHHbx23Yxi28DNu4vVkhDAIAwJgat/AybOP2ZoV9BgEAGBnb3Rq3r3Pb7taY5Mi+4B6m7Z+ZfRsHx6bzAABwGezW8GRcXoBfaeMUXhgsYRAAgJGh4QmjbmmlMzYzrcIgAAAj4yVTk1nf2Oxah2Ebt2XMGsgAADAySumvDlfSuO3bKAwCADAynnv+e2cFd6vDlTRuy5iFQQAARsZEjynAXnW4ksZt30ZhEACAkbFVa191uJLGbd9GYRAAgJEx02OGpVcdriSbzgMAwGWyMDe7o1tjcrRnXkbBOG2FMArGad9GYRAAgJGx/SJbeBmMcdsKgcESBgEAGCnjNPMybLttheBnjGsGAQBgTHV6bHnQq067mBkEAGCkuMZtcCZK6dqJ1VYdJMIgAAAjxDVug2WrDnZjmSgAACNjt2vc6J+tOtiNMAgAwMg42+Natl51djdum6QzWMIgAAAj41iPGatedXY3bpukM1iuGQQAYGS88VXX50OPPtW1zsHYqmOwxqnB0b5nBkspE6WUlVLK7zb3X1lKeayU8qVSyr8tpVzT1F/U3H+y+fpNF32PU019rZQyd1H99qb2ZCnl3ovqXc8BAMB4+t3Pf62vOlxJ2w2OOusbqflug6Ollc6wh3Yg/SwT/YUkX7jo/j9L8qu11puTPJfkHU39HUmeq7X+1SS/2hyXUsqrk7wtyWuS3J7kXzYBcyLJryd5c5JXJ7mrOXa3cwAAMIbWNzb7qsOVNG4NjvYVBkspNyT5ySS/0dwvSX4syW83hzyYZL65fUdzP83Xf7w5/o4kH661/kWt9T8leTLJ65uPJ2utX6m1fjvJh5Pcscc5AAAArqhxa3C035nBX0vyj5K80Nx/WZL1Wut3mvvPJNleKDuT5Okkab7+jeb4C/VLHtOrvts5AAAYQ9ddO9lXHa6kcWtwtGcYLKX8T0m+Xmt9/OJyl0PrHl8bVL3bGO8ppZwppZx59tlnux0CAMARcN/ffk1fdfb2zqXV/MipT+Sme38vP3LqE3nn0uqwh3RkjdtWHfuZGXxDkp8qpfznnF/C+WM5P1M4XUrZ7kZ6Q5Kzze1nktyYJM3XX5Lk3MX1Sx7Tq/7nu5xjh1rr+2qtJ2utJ6+/XqcpAICj6sxXz/VVZ3fvXFrNhx59Klv1/JzKVq350KNPCYQHNG5bdewZBmutp2qtN9Rab8r5BjCfqrX+nSSfTvLTzWF3J/l4c/vh5n6ar3+q1lqb+tuabqOvTHJzkv+Y5HNJbm46h17TnOPh5jG9zgEAwBh66LGn+6qzOz/PwZs/MZPP3Ptj+U8P/GQ+c++PHdkgmBxu0/l/nOQfllKezPnr+97f1N+f5GVN/R8muTdJaq1PJPlIkj9N8gdJfr7WutVcE/j3kiznfLfSjzTH7nYOAADG0PYM1n7r7M7Pk930tel8rfXfJ/n3ze2v5Hwn0EuP+W9J3trj8b+S5Fe61D+R5BNd6l3PAQAA7G2ilK7Bb6J0a89B2xxmZhAAABhhd916Y1912qWvmUEAALiczGQN1rvnjyc5f43gVq2ZKCV33XrjhTr9W1rpZHF5LWfXN3JseioLc7NH9rpBM4MAAIwMM1mDd/KHX5q//JLvS0nyl1/yfTn5wy8d9pCOrKWVThY++vl01jdSk3TWN7Lw0c9naaUz7KEdiJlBAIBDGKdZglFgJmuwllY6OXV6NRubW0nOh5dTp89vK+F52r93PfxENl/YOXO9+ULNux5+4kj+PIVBAIAD8kL78nj3/HHhb0AWl9cuPD+3bWxuZXF5zXP0ANY3NvuqjzphEADggLzQvjzMtg7O2fWNvuq0i2sGAQAOyAvtwduebb34mqxTp1eP7DVZw/aSqcm+6uzuumu7/9x61UedMAgAcEDHpqf6qrO33WZb6d+3v7PVV53d3fe3X5PJiZ2dbScnSu77268Z0ogORxgEADighbnZTE1O7KhNTU5kYW52SCM6+sy2Dtbzmy/0VWd38ydmsvjTP5qZ6amUJDPTU1n86R89ssuYXTMIAHBA8ydmcuar53Z0vnzLLTNH9oXhKDg2PZVOl+BntpVRMX9ifH7HzQwCABzQ0konH3u8c2GT9K1a87HHO65vO4SFudlMXLVzGd7EVcVsK1wGwiAAwAG5vm3wznz1XLYu2cdt64WaM189N6QRwfgSBgEADsj1bYP30GNP91UHDk4YBAA4IN1EB297ye1+6+xuuscWEr3qtIswCABwQLqJMupK6a9Ou+gmCgBwQNsdBReX13J2fSPHpqeyMDc7Np0GOfqee36zrzrtIgwCABzCOLWZHwUzPbaWmLH09kAmSum6xHbC1CCxTBQAgBFi6e1guQaT3QiDAACMjPkTM3nLLTMXZq4mSslbbjH7elC9ZlTNtJIIgwAAjJCllU4+9njnwszVVq352OOdLK10hjyyo2lhbjaTEzuXhE5OFDOtJBEGAQAYIYvLa9nY3NpR29jcyuLy2pBGNAYuXRFqhSgNYRAAgJHRrXnMbnV2t7i8ls0Xdqa/zReqcE0SYRAAgBHSq8ul7pcHc7ZHiO5Vp12EQQAARobul4N1rEejmF512kUYBABgZFx37WRfdXZnqw52Y9N5AABGRq8JQBODB7O9Jcfi8lrOrm/k2PRUFuZmbdVBEjODAACMkG9sbPZVBw7OzCAAACPjJVOTWe8S/F4yZZnoQSytdHLq9OqF7To66xs5dXo1ScwOYmYQAIDR8e3vbPVVZ3f2bWQ3wiAAACPj+c0X+qqzO1tLsBthEAAAxtR0jy6sveq0izAIAMDI6LW3vD3nD0Z3VnajgQwAwCEsrXS07R8g4WWwujXj2a1Ou5gZBAA4oO1OjZ31jdR8t1Pj0kpn2EM7smamp/qqs7uJHlOqveq0i5lBAIAD2q1To9nBg7n2mu5zFb3q7G6rx5Rqrzp7G6fVAH6rAAAOSKfGwfvS17/VV53dmWkdrHFbDSAMAgAc0LEeL6h71eFKW5ibzeRVO5eETl5VsjA3O6QRHW3jtm+jMAgAcEA3vax76OtVh2G4dEmoJaIHN26rAYRBAIADevQrz/VVhyvtXQ8/kRcuyX4v1PN1+jduqwGEQQCAA9KcY/B0vxwsW0sM1sLcbKYmJ3bUpiYnjuyyW2EQAOCABJfBu+vWG/uqw5U0f2Im9995PDPTUyk534jn/juPH9luoraWAAA4oLtuvTEfevSprnUO5t3zx5MkDz32dLZqzUQpuevWGy/U6c+Lr5nIt7691bXOwcyfmDmy4e9SwiAAwAGd/OGX5t889tSOa7KuKufrHNy7548LfwMyOXFVku8Ng+frtJ0wCABwQIvLa12bc9h0/nD+zr/6bD7z5XMX7r/hR16a3/pf/uYQR3R0uWZw8Gw6DwDA2LWZHwWXBsEk+cyXz+Xv/KvPDmlE8F02nQcAIMn4tZkfBZcGwb3qcCXZdB4AgCTn28xPXLWzc+jEVeXItpkHdjduqwGEQQCAAzrz1XPZuuSiwa0Xas581SwWjKNxWw0gDAIAHNBDjz3dV529TfTYorFXHa6kcdt0XjdRAIAD2qq1rzpwtG13DR2XbqLCIADAAZUk3WKfSayD2+qRo3vV4Uobp03nLRMFADiga6+Z6KsOMEqEQQCAA3r+21t91QFGiWWiAAAHdO01E/lWl+BnZhDG19JKxzWDAABtZ2YQ2mVppZNTp1cvbDzfWd/IqdOrSXIkA6FlogAAB9Srp4leJ4yKXi/2hYCDWVxeuxAEt21sbmVxeW1IIzoczwMAgAOaKN37hvaqw5X2z3/mtX3V2d3Z9Y2+6qNOGAQAOKC7br2xrzpcafMnZvJrP/PazExPpSSZmZ7Kr/3Ma4/kksZRcGx6qq/6qHPNIADAAb17/niS5KHHns5WrZkoJXfdeuOFOoyCcdoXb9gW5mZ3XDOYJFOTE1mYmx3iqA5OGAQAANiH7VDdmm6ipZTvS/IfkryoOf63a633lVJemeTDSV6a5A+T/N1a67dLKS9K8sEktyT5L0l+ptb6n5vvdSrJO5JsJfn7tdblpn57kn+RZCLJb9RaH2jqXc8xoH87AMChvHNpNR969KkL97dqvXDf7CCMp3Gaad3PNYN/keTHaq0/muS1SW4vpdyW5J8l+dVa681Jnsv5kJfm83O11r+a5Feb41JKeXWStyV5TZLbk/zLUspEKWUiya8neXOSVye5qzk2u5wDAGDoHnrs6b7qAKNkzzBYz/v/mruTzUdN8mNJfrupP5hkvrl9R3M/zdd/vJRSmvqHa61/UWv9T0meTPL65uPJWutXmlm/Dye5o3lMr3MAAAzdVu2+iUSvOsAo2Vc30WYG74+SfD3JI0m+nGS91vqd5pBnkmzPlc4keTpJmq9/I8nLLq5f8phe9Zftco5Lx3dPKeVMKeXMs88+u59/EgDAodlaAjjK9hUGa61btdbXJrkh52fy/lq3w5rP3f73qwOsdxvf+2qtJ2utJ6+//vpuhwAADJytJYCjrK99Bmut60n+fZLbkkyXUrYb0NyQ5Gxz+5kkNyZJ8/WXJDl3cf2Sx/Sq//ku5wAAAOAQ9gyDpZTrSynTze2pJP9Dki8k+XSSn24OuzvJx5vbDzf303z9U7XW2tTfVkp5UdMl9OYk/zHJ55LcXEp5ZSnlmpxvMvNw85he5wAAGDoNZICjbD/7DL48yYNN18+rknyk1vq7pZQ/TfLhUsq7k6wkeX9z/PuT/F+llCdzfkbwbUlSa32ilPKRJH+a5DtJfr7WupUkpZS/l2Q557eW+ECt9Ynme/3jHuelcDyKAAAZjklEQVQAABg6DWQG76qSvNDlx3eVyzBh4PYMg7XWP05yokv9Kzl//eCl9f+W5K09vtevJPmVLvVPJPnEfs8BADAKBJfB6/bz3K0OHFxf1wwCAPBdEz1CX686wCjZzzJRAAC62HyhvzoMw9JKJ4vLazm7vpFj01NZmJvN/ImuO7bRMsIgAACMqaWVTk6dXs3G5laSpLO+kVOnV5NEIMQyUQCAg7p2svtLqV51uNIWl9cuBMFtG5tbWVxeG9KIGCX+pwIAOKAXTU70VYcr7ez6Rl912kUYBAA4oPXnN/uqw5V2bHqqrzrtIgwCABzQNVd3fynVqw5X2sLcbKYumamempzIwtzskEbEKNFABgBaRmfBwfmL73RvG9qrDlfa/ImZnPnquTz02NPZqjUTpeQtt8z4nSeJmUEAaJWllU4WPvr5dNY3UnO+s+DCRz+fpZXOsIcGXAZLK5187PFOtmpNkmzVmo893vE7TxJhEABa5V0PP5HNF+qO2uYLNe96+IkhjQi4nHQTZTfCIAC0yPpGj4YnPerA0aabKLsRBgEAYEzpJspuhEEAaJGrSn914GjTTZTd6CYKAC1yyeWCe9aBo227a6gOwnRjZhAAWmSmx9KwXnUAxpcwCAAtYskYtMvSSienTq/u2E7m1OlVW0uQRBgEgFaZPzGT++88npnpqZScnxG8/87jlozBmLK1BLtxzSAAtMz8iRnhD1qi02MLiV512sXMIAAAjKmJ0r1VcK867WJmEABaZmmlo7MgtMRW7d4quFeddjEzCAAtsrTSycJHP7+jmcTCRz+vmQSMKR2E2Y0wCDBgSyudvOGBT+WV9/5e3vDAp7zIZqS86+EnsnnJpoKbL9S86+EnhjQi4HLSQZjdWCYKMEDbLby3O7dtt/BOYhkeI2F9Y7OvOnC02XSe3QiDAAO0Wwtvf3gBGAYdhOnFMlGAATrbo1V3rzoAwLAIgwADdKzHBfm96nClXXftZF91AMaXMAgwQC7UZ9T95N94eV91AMaXawYBBsiF+oy6T3/x2b7qAIwvYRBgwFyozyhzXSsA2ywTBYAWcV0rANuEQQBokYW52UxcVXbUJq4qrmsFaCFhEABa5MxXz2XrhbqjtvVCzZmvnhvSiAAYFmEQAFrkocee7qsOwPjSQAYAWmSr1r7qwNG3tNLR5ZquhEEAABhTSyudnDq9mo3NrSRJZ30jp06vJolAiGWiAAAwrhaX1y4EwW0bm1tZXF4b0ogYJcIgAACMKXuLshthEAAAxpS9RdmNMAgAAGNqYW42U5MTO2pTkxP2FiWJBjIAADC2tpvE6CZKN8IgACNNS3SAw5k/MeP/TboSBgEYWVqiA8Dl45pBAEaWlugAcPkIgwCMLC3RB++6ayf7qgMwvoRBAEbWS6a6B5Redfb2k3/j5X3VARhfwiAAI+vb39nqq87efvfzX+urDsD4EgYBGFnPb77QV529rW9s9lUHYHwJgwAAjIwXXzPRVx04OGEQgJF17WT3P1O96sDR9/y3uy8D71UHDs5fUwBGVimlrzpw9NU+68DBCYMAjKxv9ZgJ6FUHAPZPGAQAYGT0mve3HgAGTxgEYGRN99hPsFcdOPosE4UrRxgEYGS966dek8mrds4HTF5V8q6fes2QRgQA4+PqYQ8AAHqZPzGTJFlcXsvZ9Y0cm57KwtzshToAcHBmBgEAAFrIzCAAI2tppZNTp1ezsXm+e2hnfSOnTq8midlBADgkM4MAjKzF5bULQXDbxuZWFpfXhjQiABgfwiAAI+vs+kZfdeDo+59ve0VfdeDg9gyDpZQbSymfLqV8oZTyRCnlF5r6S0spj5RSvtR8vq6pl1LKe0opT5ZS/riU8rqLvtfdzfFfKqXcfVH9llLKavOY95RSym7nAKAdjk1P9VUHAPZvPzOD30nyi7XWv5bktiQ/X0p5dZJ7k3yy1npzkk8295PkzUlubj7uSfLe5HywS3JfkluTvD7JfReFu/c2x24/7vam3uscALTAG191fV914Oj7rUef6qsOHNyeYbDW+rVa6x82t7+Z5AtJZpLckeTB5rAHk8w3t+9I8sF63qNJpkspL08yl+SRWuu5WutzSR5Jcnvzte+vtX621lqTfPCS79XtHAC0wO9+/mt91YGjz6bzcOX0dc1gKeWmJCeSPJbkh2qtX0vOB8YkP9gcNpPk6Yse9kxT263+TJd6djkHAC2wvrHZVx0A2L99h8FSyn+X5GNJ/kGt9b/udmiXWj1Afd9KKfeUUs6UUs48++yz/TwUAIARcs1Et5eGvevAwe0rDJZSJnM+CP5WrfV0U/6zZolnms9fb+rPJLnxooffkOTsHvUbutR3O8cOtdb31VpP1lpPXn+960gAxkXp8dqvVx04+l78ou7bYPeqAwe3n26iJcn7k3yh1vrPL/rSw0m2O4LeneTjF9Xf3nQVvS3JN5olnstJ3lRKua5pHPOmJMvN175ZSrmtOdfbL/le3c4BQAvUHutEetWBo2/9+R7Lw3vUgYPbz1ssb0jyd5OsllL+qKn9H0keSPKRUso7kjyV5K3N1z6R5CeSPJnk+SQ/myS11nOllF9O8rnmuF+qtZ5rbv9ckt9MMpXk95uP7HIOAFqgpPt1AyYGYXxNXzuZ57oEv+lrJ4cwGhhve4bBWuv/k95/d3+8y/E1yc/3+F4fSPKBLvUzSf56l/p/6XYOANpBV0FoHysC4Mrpq5soAABcTroIw5UjDAIAMDJ6LUezPBwGT1smAABGhuXhjLqllU4Wl9dydn0jx6ansjA3m/kTM3s/cAQJgwAAAPuwtNLJqdOr2djcSpJ01jdy6vRqkhzJQGiZKAAAI+Paye4vT3vV4UpaXF67EAS3bWxuZXF5bUgjOhy/VQAAjIwXTU70VYcr6ez6Rl/1UScMAjCypnrMBPSqA0efTecZZcemp/qqjzp/TQEYWX/xnRf6qgNH37XXdJ8B7FWHK2lhbjZTl8xST01OZGFudkgjOhwNZAAGbJy6jA3bCz3aB/aqA0fft7691VcdrqTtv+fj8ndeGAQYoHHrMgYA7DR/YmZs/qZbJgowQOPWZQwAGF/CIMAAjVuXMYArrVd/KH2jYPD8WgEM0Lh1GQO40q65unujmF514OCEQYABGrcuYwBXmgYycOUIgwADNH9iJm+5ZSYTpSRJJkrJW24ZnwvNAYDxIQwCDNDSSicfe7yTrXp+74OtWvOxxztZWukMeWQAADsJgwADpJsowOFcVfqrAwcnDAIMkG6iAIfzQu2vDhycMAgwQLqJAhzO9jXX+60DBycMAgzQG191fV91AHbavuZ6v3W40pZWOnnDA5/KK+/9vbzhgU8d6b4AVw97AADj5NNffLavOgA7XXftZJ57frNrHYZtaaWTU6dXL/QH6Kxv5NTp1SQ5kp3DzQwCDJBrBgEOp9cEoIlBRsG4NYoTBgEGyDWDAIfzjY3vnRXcrQ5X0ri96SsMAgzQwtxspiYndtSmJieyMDc7pBEBHC3eVGOUjdvzUxgEGKD5EzO5/87jmZmeSkkyMz2V++88fiSvIwAYBm+qMcrG7fmpgQzAgM2fmBH+AA5o+//PxeW1nF3fyLHpqSzMzfp/lZEwbs9PYRBgwN65tJqHHns6W7VmopTcdeuNeff88WEPC+DI8KYao2ycnp/CIMAAvXNpNR969KkL97dqvXBfIAQARolrBgEG6LcuCoL7qQPwvcZpU28YZWYGAQao1zZYtscC2J9x29QbRpmZQQAARsa4beoNo0wYBBigayZKX3UAdhq3Tb1hlAmDAAP04hd1X33fqw7ATuO2qTeMMmEQWs5F+oP13PObfdUB2GncNvWGUeatamgxF+kDMGrGbVNvGGXCILTYbhfp+6MLwLCM06beMMosE4UWc5E+AEB7CYPQYi7SBwBoL2EQWsxF+gAA7eWaQWgxF+kDALSXMAgt5yJ9AIB2skwUAACghcwMQsstrXQsEwUAaCFhEFrMpvMAAO1lmSi02G6bzgMAMN7MDEKL2XQe4HDe8CMvzWe+fK5rnYNzCQNcGWYGocVsOg9wOG89+YqUS2qlqXMw25cwdNY3UvPdSxiWVjrDHhqMHWEQWmxhbjaTV+18GTN5VbHpPMA+LS6vpV5Sq02dg3EJA1w5wiC0Xbe3tAHYF8vtB8/PFK4cYRBabHF5LZtbO9/T3tyq3n0F2CfL7QfPzxSuHGEQWsy7rwCH88ZXXd9Xnb0tzM1manJiR21qcsIlDHAZCIPQYt59BTicT3/x2b7q7G3+xEzuv/N4ZqanUpLMTE/l/juP6yYKl4GtJaDFFuZmd2w6n3j3FaAfVlhcHvMnZoQ/uALMDEKLefcV4HCssACOMjOD0HLefQU4OCssgKNMGISWW1rpZHF5LWfXN3JseioLc7PCIcA+bf9/6f9R4CgSBqHFllY6O97R7qxv5NTp1STxQgZgn6ywAI4q1wxCiy0ur+1Y2pQkG5tb9hkEAGgBYRBarNOj212vOgAA40MYhBabKKWvOgAA42PPMFhK+UAp5eullD+5qPbSUsojpZQvNZ+va+qllPKeUsqTpZQ/LqW87qLH3N0c/6VSyt0X1W8ppaw2j3lPKedfhfY6BzA4W7X2VQcAYHzsZ2bwN5Pcfknt3iSfrLXenOSTzf0keXOSm5uPe5K8Nzkf7JLcl+TWJK9Pct9F4e69zbHbj7t9j3MAAzLTYx+sXnUAAMbHnmGw1vofkpy7pHxHkgeb2w8mmb+o/sF63qNJpkspL08yl+SRWuu5WutzSR5Jcnvzte+vtX621lqTfPCS79XtHMCALMzNZmpyYkfN/lgAAO1w0K0lfqjW+rUkqbV+rZTyg019JsnTFx33TFPbrf5Ml/pu5/gepZR7cn52Ma94xSsO+E+C9rE/FgBAew26gUy3rhP1APW+1FrfV2s9WWs9ef311/f7cAAAgNY5aBj8s2aJZ5rPX2/qzyS58aLjbkhydo/6DV3qu50DGJCllU5+8aOfT2d9IzXnt5T4xY9+PksrnWEPDQCAy+ygYfDhJNsdQe9O8vGL6m9vuoreluQbzVLP5SRvKqVc1zSOeVOS5eZr3yyl3NZ0EX37Jd+r2zmAAfknv7OarRd2TsZvvVDzT35ndUgjAi63XhvH2FAGoH32vGawlPJQkv8+yQ+UUp7J+a6gDyT5SCnlHUmeSvLW5vBPJPmJJE8meT7JzyZJrfVcKeWXk3yuOe6Xaq3bTWl+Luc7lk4l+f3mI7ucAxiQb317q686cPT1uhbDhjIA7bNnGKy13tXjSz/e5dia5Od7fJ8PJPlAl/qZJH+9S/2/dDsHAHBwJd2Dn5lBgPYZdAMZAGCEmRkEYJswCAAA0ELCILTYROm+MKxXHQCA8XHQTeeBMbBVuy8M61UH4HstrXSyuLyWs+sbOTY9lYW52cyfmBn2sAD2JAxCi02U0jX4mRkE2J+llU5OnV7Nxub5Lsyd9Y2cOn1+ex6BEBh1lolCi5kZBDicxeW1C0Fw28bmVhaX14Y0IoD9EwYBAA7o7PpGX3WAUSIMAkCL9FoEbnH4wRybnuqrDjBKhEEAaBH7DA7WwtxspiYndtSmJieyMDc7pBEB7J8GMgAAB7TdJEY3UeAoEgahxXQThfYp6T4L6Lf+4OZPzAh/wJFkmSi0mG6ijLoXXzPRV529WSYKwDZhEFqs1wygmUFGxeRE9z9TversbaZHY5NedQDGl7+m0GJmBgfvumsn+6qzu29sbPZVZ28angCwTRiEFjNDMHivfvlf6qvO7rTtH7z5EzO5/87jmZmeSsn53/f77zzumjeAFtJABlrsja+6Ph969KmudQ7m0a8811ed3d30sql0umzefdPLhMHD0PAEgMTMILTap7/4bF919mbp7WAJ1wBw+QiD0GJnu8y47FZnb71a72jJczDCNQBcPpaJXmZLKx0b0TKyjk13X4LneqyDu/aaiXzr21td6/SvlKRb7tPwFgAOTxi8jJZWOjl1ejUbm+dfGHbWN3Lq9GqSCISH8M6l1Tz02NPZqjUTpeSuW2/Mu+ePD3tYR9LC3OyO52iiq+BhPd8lCO5WZ3dTV1+V5zdf6FoHAA7HX9PLaHF5bceL7CTZ2NzK4vLakEZ09L1zaTUfevSpC0vEtmrNhx59Ku9cWh3yyI6m+RMzecstMxf2FZwoJW+5RWOJw9D9crC6BcHd6gDA/gmDl5HrsQbvocee7qvO7pZWOvnY450d4fpjj3eytNIZ8siOLnu4DdZEj/WgveoAwP4Jg5eRGYLB00xisMxeD978iZm87hUv2VF73SteYrb1gPzOA8DlIwxeRmYIBs8swWCZvR68dy6t5jNfPrej9pkvn7OU+YCmpyb7qgMA+ycMXkbzJ2Zy/53HMzM9lZJkZnoq99953AzBIdx164191dmd2evBs5R5sHq9z+P9HwA4PN1EL7P5E5pxDNJ211DdRAfjja+6Ph969KmudQ7GssbBWn9+s686ALB/wiBHzrvnjwt/A/LpLz7bVx2uNHthAsDlY5kotJhrBhl1rr0GgMtHGIQWc83g4M30+Nn1qrM7114DwOVjmSi02MLcbE6dXt2xvYRZl8PxMx08114DwOUhDEKLbb/AXlxey9n1jRybnsrC3KwX3ofgZwoAHBWljlmHu5MnT9YzZ84MexgAAABDUUp5vNZ6cq/jXDMIAADQQpaJXmZLKx3LxaBl/N4DAEeBMHgZLa10djSS6Kxv5NTp1STxwpCRIbgMlt97AOCosEz0MlpcXtvRUTBJNja3sri8NqQRwU7bwaWzvpGa7waXpZXOsId2ZPm9BwCOCjODl5ENvS8PM1mDs1tw8TM9GL/3AMBRYWbwMrKh9+CZyRoswWXwXjI12VcdAGBYhMHLaGFuNlOTEztqNp8+HEvwBssbFoNXSn91AIBhEQYvo/kTM7n/zuOZmZ5KSTIzPZX77zxu+d0hmMkaLG9YDN7685t91QEAhsU1g5fZ/IkZ4W+Ajk1PpdMl+JnJOpjt56ZrMAfHcxQAOCqEQY6UhbnZHW37EzNZh+UNi8HyHAUAjgphkCPFTBajznMUADgqSq112GMYqJMnT9YzZ84MexgAAABDUUp5vNZ6cq/jNJABAABoIWEQAACghYRBAACAFhIGAQAAWkgYBAAAaCFhEAAAoIWEQQAAgBYSBgEAAFpIGAQAAGghYRAAAKCFhEEAAIAWEgYBAABaSBgEAABoIWEQAACghYRBAACAFhIGAQAAWmjkw2Ap5fZSylop5clSyr3DHg8AAMA4GOkwWEqZSPLrSd6c5NVJ7iqlvHq4owIAADj6RjoMJnl9kidrrV+ptX47yYeT3DHkMQEAABx5ox4GZ5I8fdH9Z5raDqWUe0opZ0opZ5599tkrNjgAAICj6uphD2APpUutfk+h1vcleV+SlFKeLaV89XIPjJHwA0n+fNiDgF14jjLqPEcZdZ6jjLJRfn7+8H4OGvUw+EySGy+6f0OSs7s9oNZ6/WUdESOjlHKm1npy2OOAXjxHGXWeo4w6z1FG2Tg8P0d9mejnktxcSnllKeWaJG9L8vCQxwQAAHDkjfTMYK31O6WUv5dkOclEkg/UWp8Y8rAAAACOvJEOg0lSa/1Ekk8MexyMpPcNewCwB89RRp3nKKPOc5RRduSfn6XW7+nHAgAAwJgb9WsGAQAAuAyEQY6UUsqNpZRPl1K+UEp5opTyC8MeE3RTSpkopayUUn532GOBS5VSpkspv11K+WLz/+nfHPaY4GKllP+9+Tv/J6WUh0op3zfsMdFupZQPlFK+Xkr5k4tqLy2lPFJK+VLz+bphjvEghEGOmu8k+cVa619LcluSny+lvHrIY4JufiHJF4Y9COjhXyT5g1rrq5L8aDxXGSGllJkkfz/JyVrrX8/5JoJvG+6oIL+Z5PZLavcm+WSt9eYkn2zuHynCIEdKrfVrtdY/bG5/M+dfwMwMd1SwUynlhiQ/meQ3hj0WuFQp5fuT/K0k70+SWuu3a63rwx0VfI+rk0yVUq5Ocm322GcaLrda639Icu6S8h1JHmxuP5hk/ooOagCEQY6sUspNSU4keWy4I4Hv8WtJ/lGSF4Y9EOjiryR5Nsm/bpYy/0Yp5cXDHhRsq7V2kvyf/3879w8jQxyHYfz5JkeBUghRUKmpxDXilEKlIxdRS7Q0WpVWonYhci6hUCj0Cn8SCR1yNjlORaJSvIqZk9icYq/w2715Ps1upnq6zTv7mwFWgTXge5JnbaukTe1PsgbdHxbAvsY9E3MMaiZV1R7gEXAtyY/WPdKGqjoLrCd52bpF+oc54DhwJ8kx4CczeLRJ21f/3NV54AhwENhdVRfbVknbk2NQM6eqdtANwaUkK617pDHzwLmq+gQ8AE5X1b22SdJfRsAoycapimW6cShNizPAxyTfkvwCVoCTjZukzXytqgMA/ed6456JOQY1U6qq6J5zeZ/kduseaVyS60kOJTlM98KD50m8o62pkeQL8LmqjvaXFoB3DZOkcavAiara1f/uL+BLjjSdngCL/fdF4HHDli2Zax0gTWgeuAS8rao3/bUbSZ42bJKkWXMVWKqqncAH4HLjHumPJC+qahl4RfcW8dfA3bZVGrqqug+cAvZW1Qi4CdwCHlbVFbqbGBfaFW5NJWndIEmSJEn6zzwmKkmSJEkD5BiUJEmSpAFyDEqSJEnSADkGJUmSJGmAHIOSJEmSNECOQUmSJEkaIMegJEmSJA2QY1CSJEmSBug3xAIx302s2zUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1080x720 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.subplots(figsize=(15,10))\n",
    "plt.scatter(train['OverallQual'], np.expm1(train['SalePrice']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train set size: (1460, 81)\n",
      "Test set size: (1459, 80)\n",
      "START data processing 2019-12-19 22:36:19.364096\n",
      "(2917, 79)\n",
      "(2917, 86)\n",
      "(2917, 333)\n",
      "X (1458, 333) y (1458,) X_sub (1459, 333)\n",
      "X (1453, 331) y (1453,) X_sub (1459, 331)\n",
      "START ML 2019-12-19 22:36:20.733440\n",
      "TEST score on CV\n",
      "Kernel Ridge score: 0.1024 (0.0143)\n",
      " 2019-12-19 22:37:21.773836\n",
      "Lasso score: 0.1031 (0.0147)\n",
      " 2019-12-19 22:37:44.018447\n",
      "ElasticNet score: 0.1031 (0.0149)\n",
      " 2019-12-19 22:39:17.836134\n",
      "SVR score: 0.1023 (0.0133)\n",
      " 2019-12-19 22:39:33.158991\n",
      "START Fit\n",
      "2019-12-19 22:39:33.160991 elasticnet\n",
      "2019-12-19 22:39:43.185521 lasso\n",
      "2019-12-19 22:39:45.402078 ridge\n",
      "2019-12-19 22:39:51.828700 svr\n",
      "2019-12-19 22:39:53.475116 GradientBoosting\n"
     ]
    }
   ],
   "source": [
    "import numpy as np  # linear algebra\n",
    "import pandas as pd  #\n",
    "from datetime import datetime\n",
    "\n",
    "from scipy.stats import skew  # for some statistics\n",
    "from scipy.special import boxcox1p\n",
    "from scipy.stats import boxcox_normmax\n",
    "\n",
    "from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV\n",
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.preprocessing import RobustScaler\n",
    "from sklearn.model_selection import KFold, cross_val_score\n",
    "from sklearn.metrics import mean_squared_error\n",
    "\n",
    "# from mlxtend.regressor import StackingCVRegressor\n",
    "\n",
    "from xgboost import XGBRegressor\n",
    "# from lightgbm import LGBMRegressor\n",
    "\n",
    "import os\n",
    "\n",
    "\n",
    "# print(os.listdir(\"../input\"))\n",
    "\n",
    "# Based on https://www.kaggle.com/hemingwei/top-2-from-laurenstc-on-house-price-prediction\n",
    "\n",
    "train = pd.read_csv('train.csv')\n",
    "test = pd.read_csv('test.csv')\n",
    "print(\"Train set size:\", train.shape)\n",
    "print(\"Test set size:\", test.shape)\n",
    "print('START data processing', datetime.now(), )\n",
    "\n",
    "train_ID = train['Id']\n",
    "test_ID = test['Id']\n",
    "# Now drop the  'Id' colum since it's unnecessary for  the prediction process.\n",
    "train.drop(['Id'], axis=1, inplace=True)\n",
    "test.drop(['Id'], axis=1, inplace=True)\n",
    "\n",
    "# Deleting outliers\n",
    "train = train[train.GrLivArea < 4500]\n",
    "train.reset_index(drop=True, inplace=True)\n",
    "\n",
    "# We use the numpy fuction log1p which  applies log(1+x) to all elements of the column\n",
    "train[\"SalePrice\"] = np.log1p(train[\"SalePrice\"])\n",
    "y = train.SalePrice.reset_index(drop=True)\n",
    "train_features = train.drop(['SalePrice'], axis=1)\n",
    "test_features = test\n",
    "\n",
    "features = pd.concat([train_features, test_features]).reset_index(drop=True)\n",
    "print(features.shape)\n",
    "# Some of the non-numeric predictors are stored as numbers; we convert them into strings \n",
    "features['MSSubClass'] = features['MSSubClass'].apply(str)\n",
    "features['YrSold'] = features['YrSold'].astype(str)\n",
    "features['MoSold'] = features['MoSold'].astype(str)\n",
    "\n",
    "features['Functional'] = features['Functional'].fillna('Typ')\n",
    "features['Electrical'] = features['Electrical'].fillna(\"SBrkr\")\n",
    "features['KitchenQual'] = features['KitchenQual'].fillna(\"TA\")\n",
    "features['Exterior1st'] = features['Exterior1st'].fillna(features['Exterior1st'].mode()[0])\n",
    "features['Exterior2nd'] = features['Exterior2nd'].fillna(features['Exterior2nd'].mode()[0])\n",
    "features['SaleType'] = features['SaleType'].fillna(features['SaleType'].mode()[0])\n",
    "\n",
    "features[\"PoolQC\"] = features[\"PoolQC\"].fillna(\"None\")\n",
    "\n",
    "for col in ('GarageYrBlt', 'GarageArea', 'GarageCars'):\n",
    "    features[col] = features[col].fillna(0)\n",
    "for col in ['GarageType', 'GarageFinish', 'GarageQual', 'GarageCond']:\n",
    "    features[col] = features[col].fillna('None')\n",
    "for col in ('BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2'):\n",
    "    features[col] = features[col].fillna('None')\n",
    "\n",
    "features['MSZoning'] = features.groupby('MSSubClass')['MSZoning'].transform(lambda x: x.fillna(x.mode()[0]))\n",
    "\n",
    "objects = []\n",
    "for i in features.columns:\n",
    "    if features[i].dtype == object:\n",
    "        objects.append(i)\n",
    "\n",
    "features.update(features[objects].fillna('None'))\n",
    "\n",
    "features['LotFrontage'] = features.groupby('Neighborhood')['LotFrontage'].transform(lambda x: x.fillna(x.median()))\n",
    "\n",
    "# Filling in the rest of the NA's\n",
    "\n",
    "numeric_dtypes = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']\n",
    "numerics = []\n",
    "for i in features.columns:\n",
    "    if features[i].dtype in numeric_dtypes:\n",
    "        numerics.append(i)\n",
    "features.update(features[numerics].fillna(0))\n",
    "\n",
    "numeric_dtypes = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']\n",
    "numerics2 = []\n",
    "for i in features.columns:\n",
    "    if features[i].dtype in numeric_dtypes:\n",
    "        numerics2.append(i)\n",
    "\n",
    "skew_features = features[numerics2].apply(lambda x: skew(x)).sort_values(ascending=False)\n",
    "\n",
    "high_skew = skew_features[skew_features > 0.5]\n",
    "skew_index = high_skew.index\n",
    "\n",
    "for i in skew_index:\n",
    "    features[i] = boxcox1p(features[i], boxcox_normmax(features[i] + 1))\n",
    "\n",
    "features = features.drop(['Utilities', 'Street', 'PoolQC',], axis=1)\n",
    "\n",
    "features['YrBltAndRemod']=features['YearBuilt']+features['YearRemodAdd']\n",
    "features['TotalSF']=features['TotalBsmtSF'] + features['1stFlrSF'] + features['2ndFlrSF']\n",
    "\n",
    "features['Total_sqr_footage'] = (features['BsmtFinSF1'] + features['BsmtFinSF2'] +\n",
    "                                 features['1stFlrSF'] + features['2ndFlrSF'])\n",
    "\n",
    "features['Total_Bathrooms'] = (features['FullBath'] + (0.5 * features['HalfBath']) +\n",
    "                               features['BsmtFullBath'] + (0.5 * features['BsmtHalfBath']))\n",
    "\n",
    "features['Total_porch_sf'] = (features['OpenPorchSF'] + features['3SsnPorch'] +\n",
    "                              features['EnclosedPorch'] + features['ScreenPorch'] +\n",
    "                              features['WoodDeckSF'])\n",
    "\n",
    "# simplified features\n",
    "features['haspool'] = features['PoolArea'].apply(lambda x: 1 if x > 0 else 0)\n",
    "features['has2ndfloor'] = features['2ndFlrSF'].apply(lambda x: 1 if x > 0 else 0)\n",
    "features['hasgarage'] = features['GarageArea'].apply(lambda x: 1 if x > 0 else 0)\n",
    "features['hasbsmt'] = features['TotalBsmtSF'].apply(lambda x: 1 if x > 0 else 0)\n",
    "features['hasfireplace'] = features['Fireplaces'].apply(lambda x: 1 if x > 0 else 0)\n",
    "\n",
    "print(features.shape)\n",
    "final_features = pd.get_dummies(features).reset_index(drop=True)\n",
    "print(final_features.shape)\n",
    "\n",
    "X = final_features.iloc[:len(y), :]\n",
    "X_sub = final_features.iloc[len(X):, :]\n",
    "\n",
    "print('X', X.shape, 'y', y.shape, 'X_sub', X_sub.shape)\n",
    "\n",
    "outliers = [30, 88, 462, 631, 1322]\n",
    "X = X.drop(X.index[outliers])\n",
    "y = y.drop(y.index[outliers])\n",
    "\n",
    "overfit = []\n",
    "for i in X.columns:\n",
    "    counts = X[i].value_counts()\n",
    "    zeros = counts.iloc[0]\n",
    "    if zeros / len(X) * 100 > 99.94:\n",
    "        overfit.append(i)\n",
    "\n",
    "overfit = list(overfit)\n",
    "overfit.append('MSZoning_C (all)')\n",
    "\n",
    "X = X.drop(overfit, axis=1).copy()\n",
    "X_sub = X_sub.drop(overfit, axis=1).copy()\n",
    "\n",
    "print('X', X.shape, 'y', y.shape, 'X_sub', X_sub.shape)\n",
    "\n",
    "# ################## ML ########################################\n",
    "print('START ML', datetime.now(), )\n",
    "\n",
    "kfolds = KFold(n_splits=10, shuffle=True, random_state=42)\n",
    "\n",
    "\n",
    "# rmsle\n",
    "def rmsle(y, y_pred):\n",
    "    return np.sqrt(mean_squared_error(y, y_pred))\n",
    "\n",
    "\n",
    "# build our model scoring function\n",
    "def cv_rmse(model, X=X):\n",
    "    rmse = np.sqrt(-cross_val_score(model, X, y,\n",
    "                                    scoring=\"neg_mean_squared_error\",\n",
    "                                    cv=kfolds))\n",
    "    return (rmse)\n",
    "\n",
    "\n",
    "# setup models    \n",
    "alphas_alt = [14.5, 14.6, 14.7, 14.8, 14.9, 15, 15.1, 15.2, 15.3, 15.4, 15.5]\n",
    "alphas2 = [5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008]\n",
    "e_alphas = [0.0001, 0.0002, 0.0003, 0.0004, \n",
    "            0.0005, 0.0006, 0.0007]\n",
    "e_l1ratio = [0.8, 0.85, 0.9, 0.95, 0.99, 1]\n",
    "\n",
    "ridge = make_pipeline(RobustScaler(),\n",
    "                      RidgeCV(alphas=alphas_alt, cv=kfolds,))\n",
    "\n",
    "lasso = make_pipeline(RobustScaler(),\n",
    "                      LassoCV(max_iter=1e7, alphas=alphas2,\n",
    "                              random_state=42, cv=kfolds))\n",
    "\n",
    "elasticnet = make_pipeline(RobustScaler(),\n",
    "                           ElasticNetCV(max_iter=1e7, alphas=e_alphas,\n",
    "                                        cv=kfolds, random_state=42, l1_ratio=e_l1ratio))\n",
    "                                        \n",
    "svr = make_pipeline(RobustScaler(),\n",
    "                      SVR(C= 20, epsilon= 0.008, gamma=0.0003,))\n",
    "\n",
    "\n",
    "gbr = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,\n",
    "                                   max_depth=4, max_features='sqrt',\n",
    "                                   min_samples_leaf=15, min_samples_split=10, \n",
    "                                   loss='huber', random_state =42)\n",
    "                                   \n",
    "\n",
    "\"\"\"lightgbm = LGBMRegressor(objective='regression', \n",
    "                                       num_leaves=4,\n",
    "                                       learning_rate=0.01, \n",
    "                                       n_estimators=5000,\n",
    "                                       max_bin=200, \n",
    "                                       bagging_fraction=0.75,\n",
    "                                       bagging_freq=5, \n",
    "                                       bagging_seed=7,\n",
    "                                       feature_fraction=0.2,\n",
    "                                       feature_fraction_seed=7,\n",
    "                                       verbose=-1,\n",
    "                                       #min_data_in_leaf=2,\n",
    "                                       #min_sum_hessian_in_leaf=11\n",
    "                                       )\"\"\"\n",
    "                                       \n",
    "\n",
    "xgboost = XGBRegressor(learning_rate=0.01, n_estimators=3460,\n",
    "                                     max_depth=3, min_child_weight=0,\n",
    "                                     gamma=0, subsample=0.7,\n",
    "                                     colsample_bytree=0.7,\n",
    "                                     objective='reg:linear', nthread=-1,\n",
    "                                     scale_pos_weight=1, seed=27,\n",
    "                                     reg_alpha=0.00006, random_state=42)\n",
    "\n",
    "# stack\n",
    "# stack_gen = StackingCVRegressor(regressors=(ridge, lasso, elasticnet,\n",
    "#                                             gbr, xgboost, lightgbm),\n",
    "#                                 meta_regressor=xgboost,\n",
    "#                                 use_features_in_secondary=True)\n",
    "                                \n",
    "\n",
    "print('TEST score on CV')\n",
    "\n",
    "score = cv_rmse(ridge)\n",
    "print(\"Kernel Ridge score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()), datetime.now(), )\n",
    "\n",
    "score = cv_rmse(lasso)\n",
    "print(\"Lasso score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()), datetime.now(), )\n",
    "\n",
    "score = cv_rmse(elasticnet)\n",
    "print(\"ElasticNet score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()), datetime.now(), )\n",
    "\n",
    "score = cv_rmse(svr)\n",
    "print(\"SVR score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()), datetime.now(), )\n",
    "\n",
    "# score = cv_rmse(lightgbm)\n",
    "# print(\"Lightgbm score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()), datetime.now(), )\n",
    "\n",
    "# score = cv_rmse(gbr)\n",
    "# print(\"GradientBoosting score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()), datetime.now(), )\n",
    "\n",
    "# score = cv_rmse(xgboost)\n",
    "# print(\"Xgboost score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()), datetime.now(), )\n",
    "\n",
    "\n",
    "print('START Fit')\n",
    "# print(datetime.now(), 'StackingCVRegressor')\n",
    "# stack_gen_model = stack_gen.fit(np.array(X), np.array(y))\n",
    "print(datetime.now(), 'elasticnet')\n",
    "elastic_model_full_data = elasticnet.fit(X, y)\n",
    "print(datetime.now(), 'lasso')\n",
    "lasso_model_full_data = lasso.fit(X, y)\n",
    "print(datetime.now(), 'ridge')\n",
    "ridge_model_full_data = ridge.fit(X, y)\n",
    "print(datetime.now(), 'svr')\n",
    "svr_model_full_data = svr.fit(X, y)\n",
    "print(datetime.now(), 'GradientBoosting')\n",
    "gbr_model_full_data = gbr.fit(X, y)\n",
    "# print(datetime.now(), 'xgboost')\n",
    "# xgb_model_full_data = xgboost.fit(X, y)\n",
    "# print(datetime.now(), 'lightgbm')\n",
    "# lgb_model_full_data = lightgbm.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RMSLE score on train data:\n",
      "7.821449129039631\n",
      "Predict submission 2019-12-19 22:48:39.682862\n",
      "Save submission 2019-12-19 22:48:42.351534\n"
     ]
    }
   ],
   "source": [
    "def blend_models_predict(X):\n",
    "    return ((0.1 * elastic_model_full_data.predict(X)) + \n",
    "            (0.05 * lasso_model_full_data.predict(X)) + \n",
    "            (0.1 * ridge_model_full_data.predict(X)) + \n",
    "            (0.1 * svr_model_full_data.predict(X)))\n",
    "#             (0.1 * gbr_model_full_data.predict(X)) + \n",
    "#             (0.15 * xgb_model_full_data.predict(X))）\n",
    "#             (0.1 * lgb_model_full_data.predict(X)) + \\\n",
    "#             (0.3 * stack_gen_model.predict(np.array(X))\n",
    "            \n",
    "            \n",
    "print('RMSLE score on train data:')\n",
    "print(rmsle(y, blend_models_predict(X)))\n",
    "\n",
    "print('Predict submission', datetime.now(),)\n",
    "submission = pd.read_csv(\"sample_submission.csv\")\n",
    "submission.iloc[:,1] = np.floor(np.expm1(blend_models_predict(X_sub)))\n",
    "\n",
    "# this kernel gave a score 0.114\n",
    "# let's up it by mixing with the top kernels\n",
    "\n",
    "# print('Blend with Top Kernals submissions', datetime.now(),)\n",
    "# sub_1 = pd.read_csv('../input/top-10-0-10943-stacking-mice-and-brutal-force/House_Prices_submit.csv')\n",
    "# sub_2 = pd.read_csv('../input/hybrid-svm-benchmark-approach-0-11180-lb-top-2/hybrid_solution.csv')\n",
    "# sub_3 = pd.read_csv('../input/lasso-model-for-regression-problem/lasso_sol22_Median.csv')\n",
    "#sub_4 = pd.read_csv('../input/all-you-need-is-pca-lb-0-11421-top-4/submission.csv')\n",
    "# sub_5 = pd.read_csv('../input/house-prices-solution-0-107-lb/submission.csv') # fork my kernel again)\n",
    "\n",
    "submission.iloc[:,1] = np.floor((0.25 * np.floor(np.expm1(blend_models_predict(X_sub))))\n",
    "#                                 (0.25 * sub_1.iloc[:,1]) + \n",
    "#                                 (0.25 * sub_2.iloc[:,1]) + \n",
    "#                                 (0.25 * sub_3.iloc[:,1]) \n",
    "                                #(0.15 * sub_4.iloc[:,1])  \n",
    "                                #(0.1 * sub_5.iloc[:,1])\n",
    "                                )\n",
    "\n",
    "\n",
    "# From https://www.kaggle.com/agehsbarg/top-10-0-10943-stacking-mice-and-brutal-force\n",
    "# Brutal approach to deal with predictions close to outer range \n",
    "q1 = submission['SalePrice'].quantile(0.0042)\n",
    "q2 = submission['SalePrice'].quantile(0.99)\n",
    "\n",
    "submission['SalePrice'] = submission['SalePrice'].apply(lambda x: x if x > q1 else x*0.77)\n",
    "submission['SalePrice'] = submission['SalePrice'].apply(lambda x: x if x < q2 else x*1.1)\n",
    "\n",
    "\n",
    "submission.to_csv(\"House_price_submission_v57.csv\", index=False)\n",
    "print('Save submission', datetime.now(),)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 2., 3.])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.floor([1,2,3])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
